import argparse


def hyparams():
  parser = argparse.ArgumentParser(description='MOSEI Sentiment Analysis')
  parser.add_argument('-f', default='', type=str)

  # Fixed
  parser.add_argument(
      '--model',
      type=str,
      default='mult',
      help='name of the model to use (mult, cdcn)')
  parser.add_argument(
      '--dataset',
      type=str,
      default='iemocap',
      help='dataset to use (default: mosei_senti)')
  parser.add_argument(
      '--aligned',
      action='store_true',
      help='consider aligned experiment or not (default: False)')

  # Tasks
  parser.add_argument(
      '--v_enable',
      type=int,
      default=1,
      help='use the crossmodal fusion into v (default: False)')
  parser.add_argument(
      '--a_enable',
      type=int,
      default=1,
      help='use the crossmodal fusion into a (default: False)')
  parser.add_argument(
      '--l_enable',
      type=int,
      default=1,
      help='use the crossmodal fusion into l (default: False)')

  # Dropouts
  parser.add_argument(
      '--attn_dropout', type=float, default=0.1, help='attention dropout')
  parser.add_argument(
      '--attn_dropout_a',
      type=float,
      default=0.0,
      help='attention dropout (for audio)')
  parser.add_argument(
      '--attn_dropout_v',
      type=float,
      default=0.0,
      help='attention dropout (for visual)')
  parser.add_argument(
      '--relu_dropout', type=float, default=0.1, help='relu dropout')
  parser.add_argument(
      '--embed_dropout', type=float, default=0.25, help='embedding dropout')
  parser.add_argument(
      '--res_dropout', type=float, default=0.1, help='residual block dropout')
  parser.add_argument(
      '--out_dropout', type=float, default=0.0, help='output layer dropout')
  # Architecture
  parser.add_argument(
      '--nlayers',
      type=int,
      default=4,
      help='number of layers in the network (default: 5)')
  parser.add_argument(
      '--dims_per_head',
      type=int,
      default=4,
      help='number of heads for the transformer network (default: 5)')
  parser.add_argument(
      '--num_heads',
      type=int,
      default=10,
      help='number of heads for the transformer network (default: 5)')
  parser.add_argument(
      '--attn_mask',
      action='store_false',
      help='use attention mask for Transformer (default: true)')

  # Tuning
  parser.add_argument(
      '--batch_size',
      type=int,
      default=16,
      metavar='N',
      help='batch size (default: 24)')
  parser.add_argument(
      '--clip',
      type=float,
      default=1.0,
      help='gradient clip value (default: 0.8)')
  parser.add_argument(
      '--lr',
      type=float,
      default=1e-3,
      help='initial learning rate (default: 1e-3)')
  parser.add_argument(
      '--optim',
      type=str,
      default='Adam',
      help='optimizer to use (default: Adam)')
  parser.add_argument(
      '--num_epochs',
      type=int,
      default=20,
      help='number of epochs (default: 40)')
  parser.add_argument(
      '--when',
      type=int,
      default=20,
      help='when to decay learning rate (default: 20)')
  parser.add_argument(
      '--es',
      type=float,
      default=1.,
      help='when to early stop, multiplied by when (default: 1.5)')
  parser.add_argument(
      '--batch_chunk',
      type=int,
      default=1,
      help='number of chunks per batch (default: 1)')

  # Logistics
  parser.add_argument(
      '--train_chunk',
      type=int,
      default=3,
      help='divide training set into chuncks for higher eval frequency.')
  parser.add_argument('--no_cuda', action='store_true', help='do not use cuda')
  parser.add_argument(
      '--fusion_type',
      type=str,
      default='dc',
      help='transformer attention or dynamic convolution')
  parser.add_argument(
      '--fusion_layer_type',
      type=str,
      default='cal',
      help='transformer attention or dynamic convolution')
  parser.add_argument(
      '--kernel_size',
      type=int,
      default=1,
      help='kernel size offset for all the ccc/cdc')
  parser.add_argument(
      '--kernel_size_a',
      type=int,
      default=1,
      help='kernel size of temp convolution')
  parser.add_argument(
      '--kernel_size_v',
      type=int,
      default=1,
      help='kernel size of temp convolution')
  parser.add_argument(
      '--kernel_size_l',
      type=int,
      default=1,
      help='kernel size of temp convolution')
  parser.add_argument(
      '--kernel_size_av',
      type=int,
      default=4,
      help='kernel size of dynamic convolution')
  parser.add_argument(
      '--kernel_size_al',
      type=int,
      default=4,
      help='kernel size of dynamic convolution')
  parser.add_argument(
      '--kernel_size_va',
      type=int,
      default=4,
      help='kernel size of cross-modal dynamic convolution')
  parser.add_argument(
      '--kernel_size_vl',
      type=int,
      default=4,
      help='kernel size of cross-modal dynamic convolution')
  parser.add_argument(
      '--kernel_size_la',
      type=int,
      default=4,
      help='kernel size of cross-modal dynamic convolution')
  parser.add_argument(
      '--kernel_size_lv',
      type=int,
      default=4,
      help='kernel size of cross-modal dynamic convolution')
  args = parser.parse_args()

  if args.dataset == 'iemocap':
    args.orig_d_l, args.orig_d_a, args.orig_d_v = 300, 74, 35
    args.output_dim = 8
    args.criterion = 'CrossEntropyLoss'

    args.kernel_size_a = 2
    args.kernel_size_v = 1
    args.kernel_size_l = 1
    if not args.aligned:
      args.batch_chunk = 2
      args.kernel_size_av = 1
      args.kernel_size_al = 6
      args.kernel_size_la = 14
      args.kernel_size_lv = 3
      args.kernel_size_va = 15
      args.kernel_size_vl = 8
      args.lr = 0.00018
      args.l_len, args.a_len, args.v_len = 20, 400, 500
    else:
      args.batch_chunk = 1
      args.kernel_size_av = 5
      args.kernel_size_al = 5
      args.kernel_size_la = 5
      args.kernel_size_lv = 5
      args.kernel_size_va = 5
      args.kernel_size_vl = 5
      #   args.kernel_size_aa = 3
      #   args.kernel_size_vv = 9
      #   args.kernel_size_ll = 6
      #   args.kernel_size_av = 5
      #   args.kernel_size_al = 7
      #   args.kernel_size_la = 6
      #   args.kernel_size_lv = 9
      #   args.kernel_size_va = 8
      #   args.kernel_size_vl = 4
      args.lr = 0.0005
      args.l_len, args.a_len, args.v_len = 20, 20, 20
    args.train_chunk = 3
    args.when *= args.train_chunk
    args.model = 'mult'
    args.fusion_type = 'dc'
    args.fusion_layer_type = 'cal'
    args.batch_size = 32
    args.optim = 'Adam'
    args.nlayers = 4
    args.dims_per_head = 4
    args.num_heads = 10
    args.embed_dropout = 0.3
    args.attn_dropout = 0.25
    args.attn_dropout_l = 0.25
    args.attn_dropout_a = 0.25
    args.attn_dropout_v = 0.25
    args.out_dropout = 0.1
    args.clip = 0.8
    args.num_epochs = 11 * args.train_chunk
  elif args.dataset == 'mosi':
    args.orig_d_l, args.orig_d_a, args.orig_d_v = 300, 5, 20
    args.output_dim = 1
    args.criterion = 'L1Loss'

    args.kernel_size_a = 1
    args.kernel_size_v = 1
    args.kernel_size_l = 0
    if not args.aligned:
      args.batch_chunk = 8
      args.train_chunk = 1
      args.es = 1.
      args.model = 'mult'
      args.fusion_type = 'ta'
      args.fusion_layer_type = 'cal'
      args.kernel_size = 1
      args.dims_per_head = 4
      args.num_heads = 10
      args.l_len, args.a_len, args.v_len = 50, 375, 500
    else:
      args.batch_chunk = 1
      args.train_chunk = 1
      args.es = 1.
      args.model = 'cdcn'
      args.fusion_type = 'mc'
      args.fusion_layer_type = 'cal'
      args.kernel_size = 1
      args.dims_per_head = 12
      args.num_heads = 2
      args.l_len, args.a_len, args.v_len = 50, 50, 50
    args.when = 10
    args.when *= args.es
    args.batch_size = 128
    args.lr = 6e-3
    args.optim = 'Adam'
    args.nlayers = 4
    args.embed_dropout = 0.29
    args.attn_dropout = 0.23
    args.attn_dropout_l = 0.08
    args.attn_dropout_a = 0.30
    args.attn_dropout_v = 0.28
    args.out_dropout = 0.13
    args.clip = 0.8
    args.num_epochs = 50 * args.train_chunk
  elif args.dataset == 'mosi_new':
    args.orig_d_l, args.orig_d_a, args.orig_d_v = 300, 74, 35
    args.output_dim = 1
    args.criterion = 'L1Loss'
    args.kernel_size_a = 1
    args.kernel_size_v = 1
    args.kernel_size_l = 1
    if not args.aligned:
      args.batch_chunk = 8
      args.train_chunk = 1
      args.es = 1.
      args.model = 'mult'
      args.fusion_type = 'mc'
      args.fusion_layer_type = 'cil'
      args.kernel_size = 1
      args.dims_per_head = 4
      args.num_heads = 10
      args.l_len, args.a_len, args.v_len = 50, 500, 500
    else:
      args.batch_chunk = 1
      args.train_chunk = 1
      args.es = 1.
      args.model = 'mult'
      args.fusion_type = 'mc'
      args.fusion_layer_type = 'cil'
      args.kernel_size = 1
      args.dims_per_head = 4
      args.num_heads = 10
      args.l_len, args.a_len, args.v_len = 50, 50, 50
    args.when = 10
    args.when *= args.es
    args.batch_size = 128
    args.lr = 1e-3
    args.optim = 'Adam'
    args.nlayers = 4
    args.embed_dropout = 0.2
    args.attn_dropout = 0.2
    args.attn_dropout_l = 0.2
    args.attn_dropout_a = 0.2
    args.attn_dropout_v = 0.2
    args.out_dropout = 0.1
    args.clip = 0.8
    args.num_epochs = 50 * args.train_chunk
  elif args.dataset == 'mosei_senti':
    args.orig_d_l, args.orig_d_a, args.orig_d_v = 300, 74, 35
    args.output_dim = 1
    args.criterion = 'L1Loss'

    args.kernel_size_a = 1
    args.kernel_size_v = 1
    args.kernel_size_l = 0
    if not args.aligned:
      args.batch_chunk = 1
      args.kernel_size = 2
      args.model = 'mult'
      args.fusion_type = 'mc'
      args.fusion_layer_type = 'cil'
      args.l_len, args.a_len, args.v_len = 50, 500, 500
    else:
      args.kernel_size = 7
      args.model = 'mult'
      args.fusion_type = 'mc'
      args.fusion_layer_type = 'cil'
      args.l_len, args.a_len, args.v_len = 50, 50, 50
    args.train_chunk = 1
    args.when *= args.es
    args.batch_size = 16
    args.lr = 1e-3
    args.optim = 'Adam'
    args.nlayers = 4
    args.dims_per_head = 5
    args.num_heads = 8
    args.embed_dropout = 0.3
    args.attn_dropout = 0.1
    args.attn_dropout_l = 0.1
    args.attn_dropout_a = 0.1
    args.attn_dropout_v = 0.1
    args.out_dropout = 0.1
    args.clip = 1.0
    args.num_epochs = 12 * args.train_chunk
  elif args.dataset in ['mosei_full_emo', 'mosei_full_senti']:
    args.orig_d_l, args.orig_d_a, args.orig_d_v = 300, 74, 35
    if args.dataset == 'mosei_full_emo':
      args.output_dim = 12
      args.criterion = 'CrossEntropyLoss'
    else:
      args.output_dim = 1
      args.criterion = 'L1Loss'
    args.kernel_size_a = 1
    args.kernel_size_v = 1
    args.kernel_size_l = 1
    if not args.aligned:
      args.batch_chunk = 1
      args.kernel_size = 16
      args.lr = 0.00009
      args.attn_dropout = 0.09
      args.embed_dropout = 0.27
      args.out_dropout = 0.16
      args.model = 'mult'
      args.fusion_type = 'mc'
      args.fusion_layer_type = 'cil'
      args.l_len, args.a_len, args.v_len = 50, 500, 500
    else:
      args.kernel_size = 9
      args.lr = 0.00025
      args.attn_dropout = 0.13
      args.embed_dropout = 0.18
      args.out_dropout = 0.21
      args.model = 'mult'
      args.fusion_type = 'mc'
      args.fusion_layer_type = 'cil'
      args.l_len, args.a_len, args.v_len = 50, 50, 50
    args.train_chunk = 1
    args.when *= args.es
    args.batch_size = 16
    args.optim = 'Adam'
    args.nlayers = 4
    args.dims_per_head = 5
    args.num_heads = 8
    args.attn_dropout_l = 0.1
    args.attn_dropout_a = 0.1
    args.attn_dropout_v = 0.1
    args.clip = 1.0
    args.num_epochs = 20 * args.train_chunk

  args.model = str.upper(args.model.strip())
  args.dataset = str.lower(args.dataset.strip())
  return args
